#!/usr/bin/env python
from __future__ import annotations

import argparse
import http.client
import re
from contextlib import closing
from pathlib import Path

module_dir = Path(__file__).parent.resolve()
media_types_py = module_dir / "../src/whitenoise/media_types.py"


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--check", action="store_true")
    args = parser.parse_args()

    func_str = get_default_types_function()
    text = media_types_py.read_text()
    new_text = re.sub(
        r"def default_types.*\}",
        func_str,
        text,
        flags=re.DOTALL,
    )
    if new_text != text:
        if args.check:
            print("Would write changes")
            return 1
        else:
            print(f"Writing {media_types_py}")
            media_types_py.write_text(new_text)
    return 0


EXTRA_MIMETYPES = {
    # nginx file uses application/javascript, but HTML specification recommends
    # text/javascript:
    ".js": "text/javascript",
    ".md": "text/markdown",
    ".mjs": "text/javascript",
    ".woff": "application/font-woff",
    ".woff2": "font/woff2",
    "apple-app-site-association": "application/pkc7-mime",
    # Adobe Products - see:
    # https://www.adobe.com/devnet-docs/acrobatetk/tools/AppSec/xdomain.html#policy-file-host-basics
    "crossdomain.xml": "text/x-cross-domain-policy",
}


FUNCTION_TEMPLATE = '''\
def default_types() -> dict[str, str]:
    """
    We use our own set of default media types rather than the system-supplied
    ones. This ensures consistent media type behaviour across varied
    environments.  The defaults are based on those shipped with nginx, with
    some custom additions.

    (Auto-generated by scripts/generate_default_media_types.py)
    """
    return {{
{entries}
    }}'''


def get_default_types_function() -> str:
    types_map = get_types_map()
    lines = [
        f'        "{suffix}": "{media_type}",'  # noqa: B028
        for suffix, media_type in types_map.items()
    ]
    return FUNCTION_TEMPLATE.format(entries="\n".join(lines))


def get_types_map() -> dict[str, str]:
    nginx_data = get_nginx_data()
    matches = re.findall(r"(\w+/.*?)\s+(.*?);", nginx_data)
    types_map = {}
    for match in matches:
        media_type = match[0]
        # This is the default media type anyway, no point specifying
        # it explicitly
        if media_type == "application/octet-stream":
            continue

        extensions = match[1].split()
        for extension in extensions:
            types_map[f".{extension}"] = media_type
    types_map.update(EXTRA_MIMETYPES)
    return dict(sorted(types_map.items()))


def get_nginx_data() -> str:
    conn = http.client.HTTPSConnection("raw.githubusercontent.com")
    with closing(conn):
        conn.request("GET", "/nginx/nginx/master/conf/mime.types")
        response = conn.getresponse()
        assert response.status == 200
        return response.read().decode()


if __name__ == "__main__":
    raise SystemExit(main())
